1   package org.apache.lucene.search.join;
2   
3   /*
4    * Licensed to the Apache Software Foundation (ASF) under one or more
5    * contributor license agreements.  See the NOTICE file distributed with
6    * this work for additional information regarding copyright ownership.
7    * The ASF licenses this file to You under the Apache License, Version 2.0
8    * (the "License"); you may not use this file except in compliance with
9    * the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  
20  import org.apache.lucene.index.DocValues;
21  import org.apache.lucene.index.LeafReaderContext;
22  import org.apache.lucene.index.MultiDocValues;
23  import org.apache.lucene.index.SortedDocValues;
24  import org.apache.lucene.search.Collector;
25  import org.apache.lucene.search.LeafCollector;
26  import org.apache.lucene.search.Scorer;
27  import org.apache.lucene.util.LongBitSet;
28  import org.apache.lucene.util.LongValues;
29  
30  import java.io.IOException;
31  import java.util.Arrays;
32  
33  abstract class GlobalOrdinalsWithScoreCollector implements Collector {
34  
35    final String field;
36    final boolean doMinMax;
37    final int min;
38    final int max;
39    final MultiDocValues.OrdinalMap ordinalMap;
40    final LongBitSet collectedOrds;
41  
42    protected final Scores scores;
43    protected final Occurrences occurrences;
44  
45    GlobalOrdinalsWithScoreCollector(String field, MultiDocValues.OrdinalMap ordinalMap, long valueCount, ScoreMode scoreMode, int min, int max) {
46      if (valueCount > Integer.MAX_VALUE) {
47        // We simply don't support more than
48        throw new IllegalStateException("Can't collect more than [" + Integer.MAX_VALUE + "] ids");
49      }
50      this.field = field;
51      this.doMinMax = !(min <= 0 && max == Integer.MAX_VALUE);
52      this.min = min;
53      this.max = max;;
54      this.ordinalMap = ordinalMap;
55      this.collectedOrds = new LongBitSet(valueCount);
56      if (scoreMode != ScoreMode.None) {
57        this.scores = new Scores(valueCount, unset());
58      } else {
59        this.scores = null;
60      }
61      if (scoreMode == ScoreMode.Avg || doMinMax) {
62        this.occurrences = new Occurrences(valueCount);
63      } else {
64        this.occurrences = null;
65      }
66    }
67  
68    public boolean match(int globalOrd) {
69      if (collectedOrds.get(globalOrd)) {
70        if (doMinMax) {
71          final int occurrence = occurrences.getOccurrence(globalOrd);
72          return occurrence >= min && occurrence <= max;
73        } else {
74          return true;
75        }
76      }
77      return false;
78    }
79  
80    public float score(int globalOrdinal) {
81      return scores.getScore(globalOrdinal);
82    }
83  
84    protected abstract void doScore(int globalOrd, float existingScore, float newScore);
85  
86    protected abstract float unset();
87  
88    @Override
89    public LeafCollector getLeafCollector(LeafReaderContext context) throws IOException {
90      SortedDocValues docTermOrds = DocValues.getSorted(context.reader(), field);
91      if (ordinalMap != null) {
92        LongValues segmentOrdToGlobalOrdLookup = ordinalMap.getGlobalOrds(context.ord);
93        return new OrdinalMapCollector(docTermOrds, segmentOrdToGlobalOrdLookup);
94      } else {
95        return new SegmentOrdinalCollector(docTermOrds);
96      }
97    }
98  
99    @Override
100   public boolean needsScores() {
101     return true;
102   }
103 
104   final class OrdinalMapCollector implements LeafCollector {
105 
106     private final SortedDocValues docTermOrds;
107     private final LongValues segmentOrdToGlobalOrdLookup;
108     private Scorer scorer;
109 
110     OrdinalMapCollector(SortedDocValues docTermOrds, LongValues segmentOrdToGlobalOrdLookup) {
111       this.docTermOrds = docTermOrds;
112       this.segmentOrdToGlobalOrdLookup = segmentOrdToGlobalOrdLookup;
113     }
114 
115     @Override
116     public void collect(int doc) throws IOException {
117       final long segmentOrd = docTermOrds.getOrd(doc);
118       if (segmentOrd != -1) {
119         final int globalOrd = (int) segmentOrdToGlobalOrdLookup.get(segmentOrd);
120         collectedOrds.set(globalOrd);
121         float existingScore = scores.getScore(globalOrd);
122         float newScore = scorer.score();
123         doScore(globalOrd, existingScore, newScore);
124         if (occurrences != null) {
125           occurrences.increment(globalOrd);
126         }
127       }
128     }
129 
130     @Override
131     public void setScorer(Scorer scorer) throws IOException {
132       this.scorer = scorer;
133     }
134   }
135 
136   final class SegmentOrdinalCollector implements LeafCollector {
137 
138     private final SortedDocValues docTermOrds;
139     private Scorer scorer;
140 
141     SegmentOrdinalCollector(SortedDocValues docTermOrds) {
142       this.docTermOrds = docTermOrds;
143     }
144 
145     @Override
146     public void collect(int doc) throws IOException {
147       final int segmentOrd = docTermOrds.getOrd(doc);
148       if (segmentOrd != -1) {
149         collectedOrds.set(segmentOrd);
150         float existingScore = scores.getScore(segmentOrd);
151         float newScore = scorer.score();
152         doScore(segmentOrd, existingScore, newScore);
153         if (occurrences != null) {
154           occurrences.increment(segmentOrd);
155         }
156       }
157     }
158 
159     @Override
160     public void setScorer(Scorer scorer) throws IOException {
161       this.scorer = scorer;
162     }
163   }
164 
165   static final class Min extends GlobalOrdinalsWithScoreCollector {
166 
167     public Min(String field, MultiDocValues.OrdinalMap ordinalMap, long valueCount, int min, int max) {
168       super(field, ordinalMap, valueCount, ScoreMode.Min, min, max);
169     }
170 
171     @Override
172     protected void doScore(int globalOrd, float existingScore, float newScore) {
173       scores.setScore(globalOrd, Math.min(existingScore, newScore));
174     }
175 
176     @Override
177     protected float unset() {
178       return Float.POSITIVE_INFINITY;
179     }
180   }
181 
182   static final class Max extends GlobalOrdinalsWithScoreCollector {
183 
184     public Max(String field, MultiDocValues.OrdinalMap ordinalMap, long valueCount, int min, int max) {
185       super(field, ordinalMap, valueCount, ScoreMode.Max, min, max);
186     }
187 
188     @Override
189     protected void doScore(int globalOrd, float existingScore, float newScore) {
190       scores.setScore(globalOrd, Math.max(existingScore, newScore));
191     }
192 
193     @Override
194     protected float unset() {
195       return Float.NEGATIVE_INFINITY;
196     }
197   }
198 
199   static final class Sum extends GlobalOrdinalsWithScoreCollector {
200 
201     public Sum(String field, MultiDocValues.OrdinalMap ordinalMap, long valueCount, int min, int max) {
202       super(field, ordinalMap, valueCount, ScoreMode.Total, min, max);
203     }
204 
205     @Override
206     protected void doScore(int globalOrd, float existingScore, float newScore) {
207       scores.setScore(globalOrd, existingScore + newScore);
208     }
209 
210     @Override
211     protected float unset() {
212       return 0f;
213     }
214   }
215 
216   static final class Avg extends GlobalOrdinalsWithScoreCollector {
217 
218     public Avg(String field, MultiDocValues.OrdinalMap ordinalMap, long valueCount, int min, int max) {
219       super(field, ordinalMap, valueCount, ScoreMode.Avg, min, max);
220     }
221 
222     @Override
223     protected void doScore(int globalOrd, float existingScore, float newScore) {
224       scores.setScore(globalOrd, existingScore + newScore);
225     }
226 
227     @Override
228     public float score(int globalOrdinal) {
229       return scores.getScore(globalOrdinal) / occurrences.getOccurrence(globalOrdinal);
230     }
231 
232     @Override
233     protected float unset() {
234       return 0f;
235     }
236   }
237 
238   static final class NoScore extends GlobalOrdinalsWithScoreCollector {
239 
240     public NoScore(String field, MultiDocValues.OrdinalMap ordinalMap, long valueCount, int min, int max) {
241       super(field, ordinalMap, valueCount, ScoreMode.None, min, max);
242     }
243 
244     @Override
245     public LeafCollector getLeafCollector(LeafReaderContext context) throws IOException {
246       final SortedDocValues docTermOrds = DocValues.getSorted(context.reader(), field);
247       if (ordinalMap != null) {
248         final LongValues segmentOrdToGlobalOrdLookup = ordinalMap.getGlobalOrds(context.ord);
249         return new LeafCollector() {
250 
251           @Override
252           public void setScorer(Scorer scorer) throws IOException {
253           }
254 
255           @Override
256           public void collect(int doc) throws IOException {
257             final long segmentOrd = docTermOrds.getOrd(doc);
258             if (segmentOrd != -1) {
259               final int globalOrd = (int) segmentOrdToGlobalOrdLookup.get(segmentOrd);
260               collectedOrds.set(globalOrd);
261               occurrences.increment(globalOrd);
262             }
263           }
264         };
265       } else {
266         return new LeafCollector() {
267           @Override
268           public void setScorer(Scorer scorer) throws IOException {
269           }
270 
271           @Override
272           public void collect(int doc) throws IOException {
273             final int segmentOrd = docTermOrds.getOrd(doc);
274             if (segmentOrd != -1) {
275               collectedOrds.set(segmentOrd);
276               occurrences.increment(segmentOrd);
277             }
278           }
279         };
280       }
281     }
282 
283     @Override
284     protected void doScore(int globalOrd, float existingScore, float newScore) {
285     }
286 
287     @Override
288     public float score(int globalOrdinal) {
289       return 1f;
290     }
291 
292     @Override
293     protected float unset() {
294       return 0f;
295     }
296 
297     @Override
298     public boolean needsScores() {
299       return false;
300     }
301   }
302 
303   // Because the global ordinal is directly used as a key to a score we should be somewhat smart about allocation
304   // the scores array. Most of the times not all docs match so splitting the scores array up in blocks can prevent creation of huge arrays.
305   // Also working with smaller arrays is supposed to be more gc friendly
306   //
307   // At first a hash map implementation would make sense, but in the case that more than half of docs match this becomes more expensive
308   // then just using an array.
309 
310   // Maybe this should become a method parameter?
311   static final int arraySize = 4096;
312 
313   static final class Scores {
314 
315     final float[][] blocks;
316     final float unset;
317 
318     private Scores(long valueCount, float unset) {
319       long blockSize = valueCount + arraySize - 1;
320       blocks = new float[(int) ((blockSize) / arraySize)][];
321       this.unset = unset;
322     }
323 
324     public void setScore(int globalOrdinal, float score) {
325       int block = globalOrdinal / arraySize;
326       int offset = globalOrdinal % arraySize;
327       float[] scores = blocks[block];
328       if (scores == null) {
329         blocks[block] = scores = new float[arraySize];
330         if (unset != 0f) {
331           Arrays.fill(scores, unset);
332         }
333       }
334       scores[offset] = score;
335     }
336 
337     public float getScore(int globalOrdinal) {
338       int block = globalOrdinal / arraySize;
339       int offset = globalOrdinal % arraySize;
340       float[] scores = blocks[block];
341       float score;
342       if (scores != null) {
343         score = scores[offset];
344       } else {
345         score = unset;
346       }
347       return score;
348     }
349 
350   }
351 
352   static final class Occurrences {
353 
354     final int[][] blocks;
355 
356     private Occurrences(long valueCount) {
357       long blockSize = valueCount + arraySize - 1;
358       blocks = new int[(int) (blockSize / arraySize)][];
359     }
360 
361     public void increment(int globalOrdinal) {
362       int block = globalOrdinal / arraySize;
363       int offset = globalOrdinal % arraySize;
364       int[] occurrences = blocks[block];
365       if (occurrences == null) {
366         blocks[block] = occurrences = new int[arraySize];
367       }
368       occurrences[offset]++;
369     }
370 
371     public int getOccurrence(int globalOrdinal) {
372       int block = globalOrdinal / arraySize;
373       int offset = globalOrdinal % arraySize;
374       int[] occurrences = blocks[block];
375       return occurrences[offset];
376     }
377 
378   }
379 
380 }